library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(knitr)
library(ggtree)
## ggtree v3.10.1 For help: https://yulab-smu.top/treedata-book/
## 
## If you use the ggtree package suite in published research, please cite
## the appropriate paper(s):
## 
## Guangchuang Yu, David Smith, Huachen Zhu, Yi Guan, Tommy Tsan-Yuk Lam.
## ggtree: an R package for visualization and annotation of phylogenetic
## trees with their covariates and other associated data. Methods in
## Ecology and Evolution. 2017, 8(1):28-36. doi:10.1111/2041-210X.12628
## 
## G Yu. Data Integration, Manipulation and Visualization of Phylogenetic
## Trees (1st ed.). Chapman and Hall/CRC. 2022. ISBN: 9781032233574
## 
## Shuangbin Xu, Lin Li, Xiao Luo, Meijun Chen, Wenli Tang, Li Zhan, Zehan
## Dai, Tommy T. Lam, Yi Guan, Guangchuang Yu. Ggtree: A serialized data
## object for visualization of a phylogenetic tree and annotation data.
## iMeta 2022, 1(4):e56. doi:10.1002/imt2.56
## 
## Attaching package: 'ggtree'
## 
## The following object is masked from 'package:tidyr':
## 
##     expand
library(TDbook) 
library(ggimage)
library(rphylopic)
## You are using rphylopic v.1.4.0. Please remember to credit PhyloPic contributors (hint: `get_attribution()`) and cite rphylopic in your work (hint: `citation("rphylopic")`).
## 
## Attaching package: 'rphylopic'
## 
## The following object is masked from 'package:ggimage':
## 
##     geom_phylopic
library(treeio)
## treeio v1.26.0 For help: https://yulab-smu.top/treedata-book/
## 
## If you use the ggtree package suite in published research, please cite
## the appropriate paper(s):
## 
## LG Wang, TTY Lam, S Xu, Z Dai, L Zhou, T Feng, P Guo, CW Dunn, BR
## Jones, T Bradley, H Zhu, Y Guan, Y Jiang, G Yu. treeio: an R package
## for phylogenetic tree input and output with richly annotated and
## associated data. Molecular Biology and Evolution. 2020, 37(2):599-603.
## doi: 10.1093/molbev/msz240
## 
## Guangchuang Yu, Tommy Tsan-Yuk Lam, Huachen Zhu, Yi Guan. Two methods
## for mapping and visualizing associated data on phylogeny using ggtree.
## Molecular Biology and Evolution. 2018, 35(12):3041-3043.
## doi:10.1093/molbev/msy194
## 
## Guangchuang Yu. Using ggtree to visualize data on tree-like structures.
## Current Protocols in Bioinformatics. 2020, 69:e96. doi:10.1002/cpbi.96
library(tidytree)
## If you use the ggtree package suite in published research, please cite
## the appropriate paper(s):
## 
## Guangchuang Yu, Tommy Tsan-Yuk Lam, Huachen Zhu, Yi Guan. Two methods
## for mapping and visualizing associated data on phylogeny using ggtree.
## Molecular Biology and Evolution. 2018, 35(12):3041-3043.
## doi:10.1093/molbev/msy194
## 
## Guangchuang Yu. Using ggtree to visualize data on tree-like structures.
## Current Protocols in Bioinformatics. 2020, 69:e96. doi:10.1002/cpbi.96
## 
## Attaching package: 'tidytree'
## 
## The following object is masked from 'package:treeio':
## 
##     getNodeNum
## 
## The following object is masked from 'package:stats':
## 
##     filter
library(ape)
## 
## Attaching package: 'ape'
## 
## The following objects are masked from 'package:tidytree':
## 
##     drop.tip, keep.tip
## 
## The following object is masked from 'package:treeio':
## 
##     drop.tip
## 
## The following object is masked from 'package:ggtree':
## 
##     rotate
## 
## The following object is masked from 'package:dplyr':
## 
##     where
library(TreeTools)
## 
## Attaching package: 'TreeTools'
## 
## The following object is masked from 'package:tidytree':
## 
##     MRCA
## 
## The following object is masked from 'package:treeio':
## 
##     MRCA
## 
## The following object is masked from 'package:ggtree':
## 
##     MRCA
library(phytools)
## Loading required package: maps
## 
## Attaching package: 'maps'
## 
## The following object is masked from 'package:purrr':
## 
##     map
## 
## 
## Attaching package: 'phytools'
## 
## The following object is masked from 'package:TreeTools':
## 
##     as.multiPhylo
## 
## The following object is masked from 'package:treeio':
## 
##     read.newick
library(ggnewscale)
library(ggtreeExtra)
## ggtreeExtra v1.12.0 For help: https://yulab-smu.top/treedata-book/
## 
## If you use the ggtree package suite in published research, please cite
## the appropriate paper(s):
## 
## S Xu, Z Dai, P Guo, X Fu, S Liu, L Zhou, W Tang, T Feng, M Chen, L
## Zhan, T Wu, E Hu, Y Jiang, X Bo, G Yu. ggtreeExtra: Compact
## visualization of richly annotated phylogenetic data. Molecular Biology
## and Evolution. 2021, 38(9):4039-4042. doi: 10.1093/molbev/msab166
library(ggstar)
library(data.table)
## 
## Attaching package: 'data.table'
## 
## The following objects are masked from 'package:lubridate':
## 
##     hour, isoweek, mday, minute, month, quarter, second, wday, week,
##     yday, year
## 
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## 
## The following object is masked from 'package:purrr':
## 
##     transpose
NEON_MAGs <- read_csv("data/NEON/GOLD_Study_ID_Gs0161344_NEON.csv")
## Rows: 1754 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr   (8): Bin ID, Genome Name, Bin Quality, Bin Lineage, GTDB-Tk Taxonomy L...
## dbl  (10): IMG Genome ID, Bin Completeness, Bin Contamination, Total Number ...
## date  (1): Date Added
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(NEON_MAGs)
## # A tibble: 6 × 19
##   `Bin ID`      `Genome Name`        `IMG Genome ID` `Bin Quality` `Bin Lineage`
##   <chr>         <chr>                          <dbl> <chr>         <chr>        
## 1 3300060643_14 Terrestrial soil mi…      3300060643 MQ            <NA>         
## 2 3300060643_16 Terrestrial soil mi…      3300060643 MQ            Bacteria     
## 3 3300060643_18 Terrestrial soil mi…      3300060643 MQ            Bacteria; Ac…
## 4 3300060643_2  Terrestrial soil mi…      3300060643 MQ            Bacteria; Ac…
## 5 3300060643_28 Terrestrial soil mi…      3300060643 MQ            Bacteria; Ps…
## 6 3300060643_35 Terrestrial soil mi…      3300060643 MQ            Bacteria; Ac…
## # ℹ 14 more variables: `GTDB-Tk Taxonomy Lineage` <chr>, `Bin Methods` <chr>,
## #   `Created By` <chr>, `Date Added` <date>, `Bin Completeness` <dbl>,
## #   `Bin Contamination` <dbl>, `Total Number of Bases` <dbl>, `5s rRNA` <dbl>,
## #   `16s rRNA` <dbl>, `23s rRNA` <dbl>, `tRNA Genes` <dbl>, `Gene Count` <dbl>,
## #   `Scaffold Count` <dbl>, `GOLD Study ID` <chr>
str(NEON_MAGs)
## spc_tbl_ [1,754 × 19] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Bin ID                  : chr [1:1754] "3300060643_14" "3300060643_16" "3300060643_18" "3300060643_2" ...
##  $ Genome Name             : chr [1:1754] "Terrestrial soil microbial communities from National Grasslands LBJ, Texas, USA - CLBJ_001-M-20210506-comp-1" "Terrestrial soil microbial communities from National Grasslands LBJ, Texas, USA - CLBJ_001-M-20210506-comp-1" "Terrestrial soil microbial communities from National Grasslands LBJ, Texas, USA - CLBJ_001-M-20210506-comp-1" "Terrestrial soil microbial communities from National Grasslands LBJ, Texas, USA - CLBJ_001-M-20210506-comp-1" ...
##  $ IMG Genome ID           : num [1:1754] 3.3e+09 3.3e+09 3.3e+09 3.3e+09 3.3e+09 ...
##  $ Bin Quality             : chr [1:1754] "MQ" "MQ" "MQ" "MQ" ...
##  $ Bin Lineage             : chr [1:1754] NA "Bacteria" "Bacteria; Actinomycetota; Actinomycetes" "Bacteria; Actinomycetota; Actinomycetes" ...
##  $ GTDB-Tk Taxonomy Lineage: chr [1:1754] "Bacteria; Acidobacteriota; Blastocatellia; Pyrinomonadales; Pyrinomonadaceae; PSRF01" "Bacteria; Acidobacteriota; Vicinamibacteria; Vicinamibacterales; UBA2999; Gp6-AA45" "Bacteria; Actinobacteriota; Actinomycetia; Streptosporangiales; Streptosporangiaceae; Chersky-822" "Bacteria; Actinobacteriota; Actinomycetia; Mycobacteriales; Jatrophihabitantaceae; JAFAWL01" ...
##  $ Bin Methods             : chr [1:1754] "MetaBAT v2:2.15, CheckM v1.2.1, GTDB-tk v2.1.1, GTDB database release R207_v2" "MetaBAT v2:2.15, CheckM v1.2.1, GTDB-tk v2.1.1, GTDB database release R207_v2" "MetaBAT v2:2.15, CheckM v1.2.1, GTDB-tk v2.1.1, GTDB database release R207_v2" "MetaBAT v2:2.15, CheckM v1.2.1, GTDB-tk v2.1.1, GTDB database release R207_v2" ...
##  $ Created By              : chr [1:1754] "IMG_PIPELINE" "IMG_PIPELINE" "IMG_PIPELINE" "IMG_PIPELINE" ...
##  $ Date Added              : Date[1:1754], format: "2023-04-06" "2023-04-06" ...
##  $ Bin Completeness        : num [1:1754] 96.2 77.5 77.2 58.4 68.7 ...
##  $ Bin Contamination       : num [1:1754] 2.56 5.3 1.99 3.74 4.67 0 2.97 3.16 1.71 5.17 ...
##  $ Total Number of Bases   : num [1:1754] 6247032 5394623 4389455 3228217 3245901 ...
##  $ 5s rRNA                 : num [1:1754] 0 0 0 0 0 1 3 0 1 0 ...
##  $ 16s rRNA                : num [1:1754] 1 0 0 0 0 0 1 1 0 0 ...
##  $ 23s rRNA                : num [1:1754] 0 0 0 0 0 1 1 0 1 0 ...
##  $ tRNA Genes              : num [1:1754] 54 32 35 29 12 26 24 37 47 34 ...
##  $ Gene Count              : num [1:1754] 5373 5406 4705 3762 3446 ...
##  $ Scaffold Count          : num [1:1754] 39 878 607 592 474 386 270 547 10 186 ...
##  $ GOLD Study ID           : chr [1:1754] "Gs0161344" "Gs0161344" "Gs0161344" "Gs0161344" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   `Bin ID` = col_character(),
##   ..   `Genome Name` = col_character(),
##   ..   `IMG Genome ID` = col_double(),
##   ..   `Bin Quality` = col_character(),
##   ..   `Bin Lineage` = col_character(),
##   ..   `GTDB-Tk Taxonomy Lineage` = col_character(),
##   ..   `Bin Methods` = col_character(),
##   ..   `Created By` = col_character(),
##   ..   `Date Added` = col_date(format = ""),
##   ..   `Bin Completeness` = col_double(),
##   ..   `Bin Contamination` = col_double(),
##   ..   `Total Number of Bases` = col_double(),
##   ..   `5s rRNA` = col_double(),
##   ..   `16s rRNA` = col_double(),
##   ..   `23s rRNA` = col_double(),
##   ..   `tRNA Genes` = col_double(),
##   ..   `Gene Count` = col_double(),
##   ..   `Scaffold Count` = col_double(),
##   ..   `GOLD Study ID` = col_character()
##   .. )
##  - attr(*, "problems")=<externalptr>
NEON_MAGs_Ind <- NEON_MAGs %>% 
  filter(`Genome Name` != "NEON combined assembly") 

NEON_MAGs_Ind_tax <- NEON_MAGs_Ind %>% 
  separate(`GTDB-Tk Taxonomy Lineage`, c("Domain", "Phylum", "Class", "Order", "Family", "Genus"), "; ", remove = FALSE)
## Warning: Expected 6 pieces. Additional pieces discarded in 21 rows [12, 32, 66, 79, 80,
## 88, 96, 102, 104, 240, 334, 386, 657, 790, 846, 931, 943, 983, 1041, 1095,
## ...].
## Warning: Expected 6 pieces. Missing pieces filled with `NA` in 282 rows [6, 7, 42, 49,
## 50, 55, 60, 83, 85, 97, 100, 105, 107, 113, 114, 116, 119, 125, 129, 130, ...].
kable(
  NEON_MAGs_Ind_tax %>% 
    count(Phylum, sort = TRUE)
)
Phylum n
Actinobacteriota 418
Proteobacteria 248
Acidobacteriota 181
Verrucomicrobiota 57
NA 38
Chloroflexota 35
Myxococcota 29
Bacteroidota 22
Gemmatimonadota 16
Methylomirabilota 16
Planctomycetota 16
Dormibacterota 11
Eremiobacterota 11
Desulfobacterota_B 9
Desulfobacterota 5
Patescibacteria 5
Tectomicrobia 3
Cyanobacteria 2
Myxococcota_A 2
Armatimonadota 1
Chlamydiota 1
Eisenbacteria 1
Firmicutes 1
Krumholzibacteriota 1
Nitrospirota 1
NEON_MAGs <- read_csv("data/NEON/GOLD_Study_ID_Gs0161344_NEON.csv") %>% 
  # remove columns that are not needed for data analysis
  select(-c(`GOLD Study ID`, `Bin Methods`, `Created By`, `Date Added`)) %>% 
  # create a new column with the Assembly Type
  mutate("Assembly Type" = case_when(`Genome Name` == "NEON combined assembly" ~ `Genome Name`,
                            TRUE ~ "Individual")) %>% 
  mutate_at("Assembly Type", str_replace, "NEON combined assembly", "Combined") %>% 
  separate(`GTDB-Tk Taxonomy Lineage`, c("Domain", "Phylum", "Class", "Order", "Family", "Genus"), "; ", remove = FALSE) %>% 
  # Get rid of the the common string "Soil microbial communities from "
  mutate_at("Genome Name", str_replace, "Terrestrial soil microbial communities from ", "") %>% 
  # Use the first `-` to split the column in two
  separate(`Genome Name`, c("Site","Sample Name"), " - ") %>% 
  # Get rid of the the common string "S-comp-1"
  mutate_at("Sample Name", str_replace, "-comp-1", "") %>%
  # separate the Sample Name into Site ID and plot info
  separate(`Sample Name`, c("Site ID","subplot.layer.date"), "_", remove = FALSE,) %>% 
  # separate the plot info into 3 columns
  separate(`subplot.layer.date`, c("Subplot", "Layer", "Date"), "-")
## Rows: 1754 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr   (8): Bin ID, Genome Name, Bin Quality, Bin Lineage, GTDB-Tk Taxonomy L...
## dbl  (10): IMG Genome ID, Bin Completeness, Bin Contamination, Total Number ...
## date  (1): Date Added
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: Expected 6 pieces. Additional pieces discarded in 29 rows [12, 32, 66, 79, 80,
## 88, 96, 102, 104, 240, 334, 386, 657, 790, 846, 931, 943, 983, 1041, 1095,
## ...].
## Warning: Expected 6 pieces. Missing pieces filled with `NA` in 429 rows [6, 7, 42, 49,
## 50, 55, 60, 83, 85, 97, 100, 105, 107, 113, 114, 116, 119, 125, 129, 130, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 624 rows [1131, 1132,
## 1133, 1134, 1135, 1136, 1137, 1138, 1139, 1140, 1141, 1142, 1143, 1144, 1145,
## 1146, 1147, 1148, 1149, 1150, ...].
NEON_MAGs_bact_ind <- NEON_MAGs %>%
  filter(Domain == "Bacteria") %>%
  filter(`Assembly Type` == "Individual")
NEON_MAGs_bact_ind %>%
  ggplot(aes(x = Phylum)) +
  geom_bar() +
  coord_flip()

NEON_MAGs_bact_ind %>%
  ggplot(aes(x = fct_rev(fct_infreq(Phylum)), fill = Site)) +
  geom_bar() +
  coord_flip()

NEON_MAGs_bact_ind %>%   
ggplot(aes(x = fct_infreq(Phylum), y = `Total Number of Bases`)) +
  geom_boxplot() +
  theme(axis.text.x = element_text(angle=45, vjust=1, hjust=1))

NEON_MAGs_bact_ind %>%
  ggplot(aes(x = Site, fill = Phylum)) +
  geom_bar() +
  coord_flip()

NEON_MAGs_bact_ind %>%
  ggplot(aes(x = `Total Number of Bases`, y = `Gene Count`, color = Phylum)) +
  geom_point() +
  coord_flip()

NEON_MAGs_GSF <- NEON_MAGs %>%
  filter(str_detect(`Site`, "Guanica State Forest and Biosphere Reserve, Puerto Rico"))
NEON_MAGs_D <- NEON_MAGs %>%
  filter(str_detect(`GTDB-Tk Taxonomy Lineage`, "Dormibacterota"))
NEON_MAGs_GSF %>%
  ggplot(aes(x=`Bin Lineage`))+ 
  geom_bar()+
  coord_flip() +
  labs(title = "Bin Lineage Counts")

NEON_metagenomes <- read_tsv("data/NEON/exported_img_data_Gs0161344_NEON.tsv") %>% 
  rename(`Genome Name` = `Genome Name / Sample Name`) %>% 
  filter(str_detect(`Genome Name`, 're-annotation', negate = T)) %>% 
  filter(str_detect(`Genome Name`, 'WREF plot', negate = T))
## Rows: 176 Columns: 46
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (18): Domain, Sequencing Status, Study Name, Genome Name / Sample Name, ...
## dbl (16): taxon_oid, IMG Genome ID, Depth In Meters, Elevation In Meters, Ge...
## lgl (12): Altitude In Meters, Chlorophyll Concentration, Longhurst Code, Lon...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
NEON_metagenomes <- NEON_metagenomes %>% 
  # Get rid of the the common string "Soil microbial communities from "
  mutate_at("Genome Name", str_replace, "Terrestrial soil microbial communities from ", "") %>% 
  # Use the first `-` to split the column in two
  separate(`Genome Name`, c("Site","Sample Name"), " - ") %>% 
  # Get rid of the the common string "-comp-1"
  mutate_at("Sample Name", str_replace, "-comp-1", "") %>%
  # separate the Sample Name into Site ID and plot info
  separate(`Sample Name`, c("Site ID","subplot.layer.date"), "_", remove = FALSE,) %>% 
  # separate the plot info into 3 columns
  separate(`subplot.layer.date`, c("Subplot", "Layer", "Date"), "-") 
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 1 rows [53].
NEON_chemistry <- read_tsv("data/NEON/neon_plot_soilChem1_metadata.tsv") %>% 
  # remove -COMP from genomicsSampleID
  mutate_at("genomicsSampleID", str_replace, "-COMP", "") 
## Rows: 87 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr   (5): genomicsSampleID, siteID, plotID, nlcdClass, horizon
## dbl  (11): decimalLatitude, decimalLongitude, elevation, soilTemp, d15N, org...
## date  (1): collectionDate
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
NEON_FULL <- NEON_MAGs %>% 
  left_join(NEON_metagenomes, by = c("Sample Name")) %>%
  left_join(NEON_chemistry, by = c("Sample Name" = "genomicsSampleID"))
NEON_FULL_D <- NEON_FULL %>%
  filter(str_detect(`Phylum`,"Dormibacterota" ))
NEON_FULL_D %>%   
ggplot(aes(x = `Site.x`, y = `soilInWaterpH`)) +
  geom_boxplot() +
  theme(axis.text.x = element_text(angle=50, vjust=1, hjust=1))
## Warning: Removed 11 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

NEON_FULL_D %>%
  ggplot(aes(x = `Bin Contamination`)) +
  geom_bar() +
  labs(title = "Bin Contamination Counts")

tree_arc <- read.tree("data/NEON/gtdbtk.ar53.decorated.tree")
tree_bac <- read.tree("data/NEON/gtdbtk.bac120.decorated.tree")
node_vector_bac = c(tree_bac$tip.label,tree_bac$node.label)
grep("Dormibacterota", node_vector_bac, value = TRUE)
## [1] "'1.0:p__Dormibacterota; c__Dormibacteria'"
match(grep("Dormibacterota", node_vector_bac, value = TRUE), node_vector_bac)
## [1] 1767
NEON_MAGs_metagenomes_chemistry <- NEON_MAGs %>% 
  left_join(NEON_metagenomes, by = "Sample Name") %>% 
  left_join(NEON_chemistry, by = c("Sample Name" = "genomicsSampleID")) %>% 
  rename("label" = "Bin ID")
tree_bac_preorder <- Preorder(tree_bac)
tree_Dormibacterota <- Subtree(tree_bac_preorder, 1767)

NEON_MAGs_Dormibacterota <- NEON_MAGs_metagenomes_chemistry %>% 
  filter(Phylum == "Dormibacterota") 
ggtree(tree_bac, layout="circular", branch.length="none") +
  
    geom_hilight(node=1767, fill="steelblue", alpha=.6) +
    geom_cladelab(node=1767, label="Dormibacterota", align=TRUE, offset = 0, textcolor='steelblue', barcolor='steelblue') +

    geom_hilight(node=1789, fill="darkgreen", alpha=.6) +
    geom_cladelab(node=1789, label="Actinomycetota", align=TRUE, vjust=-0.4, offset = 0, textcolor='darkgreen', barcolor='darkgreen') +
  
      geom_hilight(node=2673, fill="darkorange", alpha=.6) +
    geom_cladelab(node=2673, label="Acidobacteriota", align=TRUE, hjust=1.1, offset = 0, textcolor='darkorange', barcolor='darkorange')

NEON_MAGs_metagenomes_chemistry_noblank <- NEON_MAGs_metagenomes_chemistry %>% 
  rename("AssemblyType" = "Assembly Type") %>% 
  rename("BinCompleteness" = "Bin Completeness") %>% 
  rename("BinContamination" = "Bin Contamination") %>% 
  rename("TotalNumberofBases" = "Total Number of Bases") %>% 
  rename("EcosystemSubtype" = "Ecosystem Subtype")

ggtree(tree_Dormibacterota)  %<+%
  NEON_MAGs_metagenomes_chemistry + 
  geom_tippoint(aes(colour=`Ecosystem Subtype`)) + 

# For unknown reasons the following does not like blank spaces in the names
  geom_facet(panel = "Bin Completeness", data = NEON_MAGs_metagenomes_chemistry_noblank, geom = geom_point, 
      mapping=aes(x = BinCompleteness)) +
  geom_facet(panel = "Bin Contamination", data = NEON_MAGs_metagenomes_chemistry_noblank, geom = geom_col, 
                aes(x = BinContamination), orientation = 'y', width = .6) +
  theme_tree2(legend.position=c(.1, .7))

ggtree(tree_Dormibacterota, layout="circular")  %<+%
  NEON_MAGs_metagenomes_chemistry + 
  geom_point2(mapping=aes(color=`Ecosystem Subtype`, size=`Total Number of Bases`))
## Warning: Removed 21 rows containing missing values or values outside the scale range
## (`geom_point_g_gtree()`).